Clusterização dos estabelecimentos por semelhança de atributos
yelp_bz <- yelp_bz_raw %>%
select_if(~is.numeric(.)) %>%
mutate_all(~replace(., is.na(.), 0))
glimpse(yelp_bz)
## Rows: 14,962
## Columns: 34
## $ latitude <dbl> 43.62661, 43.64041, 43.61129, 43.70441, 43…
## $ longitude <dbl> -79.50209, -79.39058, -79.55687, -79.37511…
## $ review_count <dbl> 4, 81, 3, 3, 4, 6, 10, 52, 14, 4, 4, 11, 7…
## $ stars <dbl> 2.0, 2.5, 1.0, 5.0, 3.0, 4.5, 3.0, 2.5, 3.…
## $ AcceptsInsurance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ AgesAllowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Alcohol <dbl> 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ BYOB <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ BikeParking <dbl> 2, 1, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 2, 0, …
## $ BusinessAcceptsCreditCards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ByAppointmentOnly <dbl> 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Caters <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, …
## $ CoatCheck <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Corkage <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ DogsAllowed <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ DriveThru <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ GoodForDancing <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ GoodForKids <dbl> 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, …
## $ HappyHour <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ HasTV <dbl> 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ NoiseLevel <dbl> 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ OutdoorSeating <dbl> 0, 2, 0, 0, 0, 0, 2, 1, 2, 0, 0, 2, 1, 1, …
## $ RestaurantsAttire <dbl> 0, 3, 0, 0, 0, 0, 3, 3, 3, 0, 0, 3, 3, 0, …
## $ RestaurantsDelivery <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, …
## $ RestaurantsGoodForGroups <dbl> 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, …
## $ RestaurantsPriceRange2 <dbl> 2, 2, 0, 0, 2, 0, 2, 1, 2, 0, 0, 1, 2, 0, …
## $ RestaurantsReservations <dbl> 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, …
## $ RestaurantsTableService <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ RestaurantsTakeOut <dbl> 0, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, …
## $ Smoking <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ WheelchairAccessible <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, …
## $ WiFi <dbl> 0, 3, 0, 0, 0, 0, 3, 3, 3, 0, 0, 3, 3, 0, …
## $ tips_counter_bz <dbl> 0, 14, 0, 0, 0, 1, 4, 5, 5, 0, 0, 6, 0, 1,…
## $ total_compliments_bz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
Será aplicada uma análise de componentes principais para entender a variabilidade da nota dos estabelecimentos considerando seus atributos.
rec_pca <- recipe(stars ~ ., yelp_bz) %>%
update_role(contains('id'), new_role = 'id') %>%
#step_date(date_rv, yelping_since_usr, features = c("dow", "month","year")) %>%
#step_other(categories, threshold = 0.005) %>%
#step_other(postal_code, threshold = 0.01) %>%
#step_dummy(all_nominal(), -'business_id',-'user_id',-'name_bz') %>%
step_normalize(all_numeric(), -all_outcomes()) %>%
step_pca(all_numeric(), -all_outcomes()) %>%
step_naomit(all_numeric()) %>%
prep()
yelp_bz_pca <- juice(rec_pca)
variance_pct <- rec_pca$steps[[2]]$res
(cumsum(variance_pct$sdev^2) / sum(variance_pct$sdev^2))
## [1] 0.2795283 0.3569530 0.4181779 0.4760438 0.5246121 0.5650128 0.6019585
## [8] 0.6384455 0.6714870 0.7020952 0.7307504 0.7590915 0.7806310 0.8013364
## [15] 0.8214466 0.8408425 0.8588443 0.8747742 0.8894284 0.9032762 0.9169327
## [22] 0.9301989 0.9408237 0.9507912 0.9598340 0.9678033 0.9751966 0.9818828
## [29] 0.9879589 0.9930369 0.9974065 1.0000000 1.0000000
fviz_eig(variance_pct, addlabels = TRUE) +
labs(x = "Componente Principal",
y = "Percentual explicado da variância")
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
Mais de 50% da variabilidade é explicada pelas 5 primeiras componentes, que são compostas da seguinte forma:
tidy_pca <- tidy(rec_pca, 2)
tidy_pca %>%
filter(component %in% paste0("PC", 1:6)) %>%
group_by(component) %>%
top_n(15, abs(value)) %>%
ungroup() %>%
mutate(terms = reorder_within(terms, abs(value), component)) %>%
ggplot(aes(abs(value), terms, fill = value > 0)) +
geom_col() +
facet_wrap(~component, scales = "free_y") +
scale_y_reordered() +
labs(
x = "Valor absoluto da contribuição",
y = NULL, fill = "Valor > 0")
Na primeira componente, os pesos são igualmente distribuídos, o que indica que todos os atributos tem impacto semelhante na maior parte da variabilidade.
Pela segunda componente, no entanto, observa-se que a existência de um local para deixar o casaco, ser permitido fumar e ser um bom local para dançar são mais relevante, assim como a localização (PC6). Além disso, cobrança de rolha e necessidade de levar a bebida também são drivers importantes, pois aparecem em mais de uma componente.
variance_pct %>%
fviz_pca_var(axes = c(1,2), col.var="contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
Os maiores contrastes são entre a modalidade de atendimento dos restaurante: Apenas delivery ou com reservas.
Classificação dos usuários conforme o perfil.
Será utilizada a clusterização k-médias por conta da quantidade de características dos usários presentes na base. Também foi tentada a aplicação de uma clusterização hierárquica, mas os resultados obtidos não foram tão interpretáveis como os seguintes.
set.seed(123)
glimpse(yelp_users)
## Rows: 119,792
## Columns: 23
## $ user_id <chr> "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw"…
## $ average_stars <dbl> 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.…
## $ compliment_cool <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_cute <dbl> 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_funny <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_hot <dbl> 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0…
## $ compliment_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_more <dbl> 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_note <dbl> 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0…
## $ compliment_photos <dbl> 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_plain <dbl> 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ compliment_profile <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_writer <dbl> 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cool <dbl> 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9,…
## $ elite_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ fans <dbl> 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ friends_count <dbl> 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1,…
## $ funny <dbl> 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5,…
## $ review_count_usr <dbl> 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, …
## $ useful <dbl> 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0…
## $ year_since <dbl> 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 20…
## $ tips_counter <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0…
## $ total_compliments <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
yelp_pad <- yelp_users %>%
select(-user_id) %>%
scale()
kclusts <- tibble(k = 1:30) %>%
mutate(kclust = map(k, ~kmeans(yelp_pad, .x)),
tidied = map(kclust, tidy),
glanced = map(kclust, glance),
augmented = map(kclust, augment, yelp_pad)
)
clusters <- kclusts %>%
unnest(cols = c(tidied))
assignments <- kclusts %>%
unnest(cols = c(augmented))
clusterings <- kclusts %>%
unnest(cols = c(glanced))
### Cotovelo
clusterings %>%
ggplot(aes(k, tot.withinss)) +
geom_point(size = 3) +
geom_line() +
labs(y = "total within sum of squares", x = "k") +
scale_x_continuous(breaks = 1:30)
Pelo gráfico do cotovelo, poderiam ser selecionado um número de clusters (k) de 11 a 17, a seguir é possível ver uma comparacão em relação às dicas.
#k-means
assignments %>%
filter(k %in% paste0(10:20)) %>%
ggplot(aes(x = tips_counter, y = total_compliments)) +
geom_point(aes(color = .cluster), alpha = 0.5) +
facet_wrap(~ k, nrow = 3)
Para a classificação final dos usuário, será feita novamente a clusterização, mas considerando apenas o k ideal.
set.seed(123)
kmeans_usr <- kmeans(yelp_pad, 11)
yelp_usr_cluster <- yelp_users %>%
mutate(cluster_usr = kmeans_usr$cluster)
glimpse(yelp_usr_cluster)
## Rows: 119,792
## Columns: 24
## $ user_id <chr> "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw"…
## $ average_stars <dbl> 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.…
## $ compliment_cool <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_cute <dbl> 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_funny <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_hot <dbl> 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0…
## $ compliment_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_more <dbl> 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_note <dbl> 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0…
## $ compliment_photos <dbl> 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_plain <dbl> 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ compliment_profile <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_writer <dbl> 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cool <dbl> 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9,…
## $ elite_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ fans <dbl> 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ friends_count <dbl> 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1,…
## $ funny <dbl> 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5,…
## $ review_count_usr <dbl> 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, …
## $ useful <dbl> 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0…
## $ year_since <dbl> 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 20…
## $ tips_counter <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0…
## $ total_compliments <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ cluster_usr <int> 3, 7, 10, 9, 3, 9, 4, 9, 4, 10, 3, 10, 4, 3, 7, 9,…
Pelo gráfico, observa-se claramente a divisão dos usuários em relação ao tempo na plataforma, a nota média e a quantidade de fãs.
plot_ly(yelp_usr_cluster, x = ~year_since,
y = ~average_stars,
z = ~fans, color = ~cluster_usr,
text = ~paste('Cluster: ', cluster_usr)) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'No Yelp desde'),
yaxis = list(title = 'Nota Média'),
zaxis = list(title = 'Quantidade de fãs')))
yelp_usr_cluster %>%
select(user_id, cluster_usr) %>%
write.csv(file = "output/usr_cluster.csv")
Como próximos passos, seria interessante entnder melhor as características de cada cluster. Para classificar usuário que não estão na base, será utilizado um modelo de árvore para fazer a classificação. O intuito é obter de uma forma rápida o cluster de um novo usuário.
user_cluster_tree <- yelp_usr_cluster %>%
select(-user_id) %>%
rpart(cluster_usr ~ ., data = .)
plot_arvore <- as.party(user_cluster_tree)
#plot(plot_arvore)
yelp_rv <- yelp_raw %>%
#mutate(line = row_number()) %>%
select(-'year_rv') %>%
mutate(stars_rv = replace(stars_rv >= 4,1,0)) %>%
select_if(is.numeric) #%>% sample_frac(0.50)
glimpse(yelp_rv)
## Rows: 219,462
## Columns: 58
## $ average_stars <dbl> 1.00, 3.21, 1.50, 3.99, 4.23, 3.64, 3.67, …
## $ compliment_cool <dbl> 0, 0, 0, 4, 2, 1, 0, 3, 0, 0, 0, 0, 1, 0, …
## $ compliment_cute <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_funny <dbl> 0, 0, 0, 4, 2, 1, 0, 3, 0, 0, 0, 0, 1, 0, …
## $ compliment_hot <dbl> 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, …
## $ compliment_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_more <dbl> 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, …
## $ compliment_note <dbl> 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_photos <dbl> 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_plain <dbl> 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 0, 1, 0, …
## $ compliment_profile <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_writer <dbl> 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, …
## $ cool <dbl> 0, 2, 0, 35, 45, 1, 0, 37, 0, 0, 17, 0, 8,…
## $ elite_count <dbl> 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, …
## $ fans <dbl> 0, 0, 0, 1, 7, 0, 0, 9, 0, 0, 4, 0, 1, 0, …
## $ friends_count <dbl> 12, 1, 1, 44, 31, 8, 1, 147, 1, 20, 34, 1,…
## $ funny <dbl> 0, 1, 0, 18, 27, 0, 1, 20, 0, 0, 2, 0, 9, …
## $ review_count_usr <dbl> 1, 19, 2, 105, 279, 14, 3, 190, 2, 4, 38, …
## $ useful <dbl> 0, 1, 1, 90, 99, 1, 1, 100, 1, 1, 19, 1, 5…
## $ year_since <dbl> 2017, 2016, 2017, 2011, 2017, 2017, 2015, …
## $ tips_counter <dbl> 0, 0, 0, 1, 1, 5, 1, 9, 0, 1, 0, 0, 5, 0, …
## $ total_compliments <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cluster_usr <dbl> 7, 3, 7, 10, 9, 9, 3, 4, 3, 7, 9, 7, 4, 3,…
## $ stars_rv <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ latitude <dbl> 43.64041, 43.64041, 43.64041, 43.64041, 43…
## $ longitude <dbl> -79.39058, -79.39058, -79.39058, -79.39058…
## $ review_count <dbl> 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81…
## $ stars <dbl> 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.…
## $ AcceptsInsurance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ AgesAllowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Alcohol <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ BYOB <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ BikeParking <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ BusinessAcceptsCreditCards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ByAppointmentOnly <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Caters <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ CoatCheck <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Corkage <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ DogsAllowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ DriveThru <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ GoodForDancing <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ GoodForKids <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ HappyHour <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ HasTV <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ NoiseLevel <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ OutdoorSeating <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ RestaurantsAttire <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, …
## $ RestaurantsDelivery <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ RestaurantsGoodForGroups <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ RestaurantsPriceRange2 <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ RestaurantsReservations <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ RestaurantsTableService <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ RestaurantsTakeOut <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ Smoking <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ WheelchairAccessible <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ WiFi <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, …
## $ tips_counter_bz <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14…
## $ total_compliments_bz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
split <- initial_split(yelp_rv, prop = 0.8 , strata = stars_rv)
train_val <- training(split)
split_val <- initial_split(train_val, prop = 0.5, strata = stars_rv)
yelp_train <- training(split_val)
yelp_val <- testing(split_val)
yelp_test <- testing(split)
mean <- yelp_train %>%
select(-stars_rv) %>%
apply(., 2, mean)
std <- yelp_train %>%
select(-stars_rv) %>%
apply(., 2, sd)
x_train <- yelp_train %>%
select(-stars_rv) %>%
scale(center = mean, scale = std) %>%
as.matrix()
dim(x_train)
## [1] 87786 57
y_train <- yelp_train %>%
select(stars_rv) %>%
as.matrix()
x_val <- yelp_val %>%
select(-stars_rv) %>%
scale(center = mean, scale = std) %>%
as.matrix()
dim(x_val)
## [1] 87785 57
y_val <- yelp_val %>%
select(stars_rv) %>%
data.matrix()
dim(x_val)
## [1] 87785 57
x_test <- yelp_test %>%
select(-stars_rv) %>%
scale(center = mean, scale = std) %>%
as.matrix()
dim(x_test)
## [1] 43891 57
y_test <- yelp_test %>%
select(stars_rv) %>%
data.matrix()
rm(yelp_nn)
yelp_nn <- keras_model_sequential() %>%
layer_dense(units = 30, activation = "tanh", input_shape = ncol(x_train)) %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 16, activation = "relu") %>%
#layer_dropout(rate = 0.5) %>%
layer_dense(units = 16, activation = "relu") %>%
#layer_dense(units = 6, activation = "softmax")
layer_dense(units = 1, activation = "sigmoid")
yelp_nn %>%
compile(optimizer = "rmsprop",
#loss = "sparse_categorical_crossentropy",
loss = "binary_crossentropy",
metrics = c("accuracy"))
history <- yelp_nn %>%
fit(x_train, y_train,
epochs = 40, batch_size = 512,
validation_data = list(x_val, y_val))
plot(history)
## `geom_smooth()` using formula 'y ~ x'
#keras::get_weights(yelp_nn)
(results <- yelp_nn %>% evaluate(x_test, y_test))
## loss accuracy
## 0.4629365 0.7722768
Foi adicionada uma camada de dropout na rede neural, para diminuir o overfit do modelo. Observa-se que foi efeciente, pois a perda da base de validação não ultrappassa a perda da base de treino.
tibble(observado = factor(y_test)) %>%
bind_cols(data.frame(prob = predict(yelp_nn, as.matrix(x_test)))) %>%
roc_auc(observado, prob)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 roc_auc binary 0.837
tibble(observado = factor(y_test)) %>%
bind_cols(data.frame(prob = predict(yelp_nn, as.matrix(x_test)))) %>%
roc_curve(observado, prob) %>%
autoplot()
Pelo gráfico, observa-se que o modelo atingiu um desempenho bom na base de teste.
Criação de um usuário e definição de seu cluster
glimpse(yelp_usr_cluster)
## Rows: 119,792
## Columns: 24
## $ user_id <chr> "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw"…
## $ average_stars <dbl> 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.…
## $ compliment_cool <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_cute <dbl> 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_funny <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_hot <dbl> 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0…
## $ compliment_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_more <dbl> 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_note <dbl> 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0…
## $ compliment_photos <dbl> 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_plain <dbl> 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ compliment_profile <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_writer <dbl> 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cool <dbl> 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9,…
## $ elite_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ fans <dbl> 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ friends_count <dbl> 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1,…
## $ funny <dbl> 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5,…
## $ review_count_usr <dbl> 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, …
## $ useful <dbl> 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0…
## $ year_since <dbl> 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 20…
## $ tips_counter <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0…
## $ total_compliments <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ cluster_usr <int> 3, 7, 10, 9, 3, 9, 4, 9, 4, 10, 3, 10, 4, 3, 7, 9,…
rm(user)
compliment_max <- 50
user <- tibble(user_id = 'random_user',
average_stars = round(runif(1, 1.0, 5),2),
compliment_cool = ceiling(runif(1,0, compliment_max)),
compliment_cute = ceiling(runif(1,0, compliment_max)),
compliment_funny = ceiling(runif(1,0, compliment_max)),
compliment_hot = ceiling(runif(1,0, compliment_max)),
compliment_list = ceiling(runif(1,0, compliment_max)),
compliment_more = ceiling(runif(1,0, compliment_max)),
compliment_note = ceiling(runif(1,0, compliment_max)),
compliment_photos = ceiling(runif(1,0, compliment_max)),
compliment_plain = ceiling(runif(1,0, compliment_max)),
compliment_profile = ceiling(runif(1,0, compliment_max)),
compliment_writer = ceiling(runif(1,0, compliment_max)),
cool = ceiling(runif(1,0, compliment_max)),
elite_count = 0,
fans = ceiling(runif(1,0, compliment_max)),
friends_count = ceiling(runif(1,0, compliment_max)),
funny = ceiling(runif(1,0, compliment_max)),
review_count_usr = ceiling(runif(1,0,compliment_max)),
useful = ceiling(runif(1,0, compliment_max)),
year_since = ceiling(runif(1,2004, 2019)),
tips_counter = ceiling(runif(1,0, compliment_max)),
total_compliments = ceiling(runif(1,0, compliment_max))
)
## criação aleatória do número de anos que o usuário foi elite
user$elite_count <- ceiling(runif(1,0, (2020-user$year_since)))
#encontra o número do cluster em que o usuário se encaixa
user$cluster_usr <- user_cluster_tree %>%
predict(user) %>%
ceiling()
glimpse(user)
## Rows: 1
## Columns: 24
## $ user_id <chr> "random_user"
## $ average_stars <dbl> 4.07
## $ compliment_cool <dbl> 14
## $ compliment_cute <dbl> 17
## $ compliment_funny <dbl> 2
## $ compliment_hot <dbl> 14
## $ compliment_list <dbl> 13
## $ compliment_more <dbl> 19
## $ compliment_note <dbl> 37
## $ compliment_photos <dbl> 49
## $ compliment_plain <dbl> 46
## $ compliment_profile <dbl> 31
## $ compliment_writer <dbl> 24
## $ cool <dbl> 1
## $ elite_count <dbl> 7
## $ fans <dbl> 21
## $ friends_count <dbl> 25
## $ funny <dbl> 7
## $ review_count_usr <dbl> 16
## $ useful <dbl> 32
## $ year_since <dbl> 2010
## $ tips_counter <dbl> 15
## $ total_compliments <dbl> 24
## $ cluster_usr <dbl> 8
# seleção aleatória de estabelecimentos e notas atribuídas a cada um baseado no número de reviews
n_reviews <- user$review_count_usr
reviewed_usr <- tibble(business_id = sample(yelp_bz_raw$business_id, n_reviews), #seleçao aleatória de estabelecimentos
stars_rv = ceiling(runif(n_reviews, 1.0, 5)),
year_rv = ceiling(runif(n_reviews, 2009, 2019)),
)
user_hist <- user %>%
bind_rows(replicate(n_reviews-1, user, simplify = FALSE)) %>% #replica as informações do usuário
bind_cols(reviewed_usr) %>% #junta os estabelecimentos e notas dadas
left_join(., yelp_bz_raw, by = 'business_id') #junta as informações dos estabelecimentos
glimpse(user_hist)
## Rows: 16
## Columns: 63
## $ user_id <chr> "random_user", "random_user", "random_user…
## $ average_stars <dbl> 4.07, 4.07, 4.07, 4.07, 4.07, 4.07, 4.07, …
## $ compliment_cool <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14…
## $ compliment_cute <dbl> 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17…
## $ compliment_funny <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ compliment_hot <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14…
## $ compliment_list <dbl> 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13…
## $ compliment_more <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19…
## $ compliment_note <dbl> 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37…
## $ compliment_photos <dbl> 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49…
## $ compliment_plain <dbl> 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46…
## $ compliment_profile <dbl> 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31…
## $ compliment_writer <dbl> 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24…
## $ cool <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ elite_count <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, …
## $ fans <dbl> 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21…
## $ friends_count <dbl> 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25…
## $ funny <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, …
## $ review_count_usr <dbl> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16…
## $ useful <dbl> 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32…
## $ year_since <dbl> 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
## $ tips_counter <dbl> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15…
## $ total_compliments <dbl> 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24…
## $ cluster_usr <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, …
## $ business_id <chr> "r-kj-kBSKFKh0sM8EVX8AA", "H7rpWv02D6WTu6I…
## $ stars_rv <dbl> 2, 4, 5, 4, 3, 5, 3, 3, 5, 5, 2, 5, 3, 5, …
## $ year_rv <dbl> 2016, 2011, 2016, 2018, 2019, 2014, 2011, …
## $ categories <chr> "Accessories, Women's Clothing, Men's Clot…
## $ latitude <dbl> 43.64127, 43.65943, 43.77336, 43.64869, 43…
## $ longitude <dbl> -79.43377, -79.38252, -79.49302, -79.38544…
## $ name <chr> "Frances Watson", "Bed Bath and Beyond", "…
## $ review_count <dbl> 3, 28, 41, 3, 4, 4, 16, 11, 3, 42, 351, 44…
## $ stars <dbl> 5.0, 2.5, 3.0, 3.0, 4.0, 2.0, 4.0, 2.5, 2.…
## $ AcceptsInsurance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ AgesAllowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Alcohol <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, …
## $ BYOB <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ BikeParking <dbl> 2, 2, 2, 0, 2, 0, 0, 2, 0, 2, 2, 2, 0, 0, …
## $ BusinessAcceptsCreditCards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, …
## $ ByAppointmentOnly <dbl> 1, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0, 0, 0, 1, …
## $ Caters <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, …
## $ CoatCheck <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Corkage <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ DogsAllowed <dbl> 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ DriveThru <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ GoodForDancing <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ GoodForKids <dbl> 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 2, 0, …
## $ HappyHour <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, …
## $ HasTV <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, …
## $ NoiseLevel <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, …
## $ OutdoorSeating <dbl> 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, …
## $ RestaurantsAttire <dbl> 0, 0, 1, 0, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, …
## $ RestaurantsDelivery <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, …
## $ RestaurantsGoodForGroups <dbl> 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, …
## $ RestaurantsPriceRange2 <dbl> 3, 2, 2, 0, 2, 0, 0, 3, 0, 1, 3, 2, 0, 0, …
## $ RestaurantsReservations <dbl> 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, …
## $ RestaurantsTableService <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, …
## $ RestaurantsTakeOut <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, …
## $ Smoking <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ WheelchairAccessible <dbl> 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 0, 2, 0, 0, …
## $ WiFi <dbl> 0, 0, 1, 0, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, …
## $ tips_counter_bz <dbl> 0, 6, 7, 0, 4, 2, 0, 0, 1, 1, 37, 3, 4, 0,…
## $ total_compliments_bz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
recomm_f <- function(user, reviewed_usr){
to_go <- yelp_raw %>%
filter(stars_rv >= 4) %>%
filter(cluster_usr == user$cluster_usr) %>%
#filter(cluster_usr == user$cluster) %>%
select(business_id) %>%
distinct()
n_go <- nrow(to_go)
#filtra todos os estabelecimentos do cluster do usuário e junta as informações para modelagem
to_review <- user %>%
bind_rows(replicate(n_go-1, user, simplify = FALSE)) %>% #replica as informações do usuário
bind_cols(to_go) %>% #junta os estabelecimentos e notas dadas
left_join(., yelp_bz_raw, by = 'business_id')
#prepara a base para o modelo
user_x_test <- to_review %>%
select_if(is.numeric) %>%
#select(-stars_rv) %>%
scale(center = mean, scale = std) %>%
as.matrix()
#aplica a base no modelo
predictions <- as_tibble(predict(yelp_nn, user_x_test))
#seleciona as principais recomendações
recommendation <- to_review %>%
bind_cols(pred = predictions) %>%
anti_join(., reviewed_usr, by = 'business_id') %>%
filter(V1 > 0.5)
}
Para validar as recomendações, é feito o teste também com um usuário aleatório da base de teste.
n <- ceiling(runif(1,1,nrow(yelp_test)))
(random_user <- yelp_raw[n,]$user_id)
## [1] "UuH7pyPsm4E5bDfXaQJ9dg"
user2 <- yelp_usr_cluster %>%
filter(user_id == random_user)
reviewed_usr2 <- yelp_raw %>%
filter(user_id == random_user)
rec_user <- recomm_f(user2,reviewed_usr2)
#top 5 recomendações
rec_user %>%
top_n(5, V1) %>%
arrange(V1) %>%
mutate(rank = as.factor(row_number())) %>%
ggplot(aes(x = V1, y = name, fill = rank)) +
geom_col() +
labs(x = "Probabilidade de avaliação positiva",
y = 'Recomendação')
top_5 <- rec_user %>%
top_n(5, V1) %>%
arrange(-V1) %>%
mutate(rank = as.factor(row_number()))
top_5 %>%
select(name, categories, V1)
## # A tibble: 5 x 3
## name categories V1
## <chr> <chr> <dbl>
## 1 Duotherapy Physical Therapy, Health & Medical, Massage Therapy 0.981
## 2 Helping Hands Doula Sleep Specialists, Health & Medical, Doulas, Massa… 0.980
## 3 Yonge Elmwood Pharm… Health & Medical, Pharmacy 0.980
## 4 Lady Amaze Health P… Waxing, Hair Removal, Massage, Beauty & Spas, Refl… 0.980
## 5 Eglinton Way Massag… Beauty & Spas, Massage 0.980
qmplot(longitude, latitude, data = top_5,
maptype = "toner-background",
color = rank,
size = V1)
## Using zoom = 13...
## Source : http://tile.stamen.com/terrain/13/2288/2984.png
## Source : http://tile.stamen.com/terrain/13/2289/2984.png
## Source : http://tile.stamen.com/terrain/13/2290/2984.png
## Source : http://tile.stamen.com/terrain/13/2291/2984.png
## Source : http://tile.stamen.com/terrain/13/2288/2985.png
## Source : http://tile.stamen.com/terrain/13/2289/2985.png
## Source : http://tile.stamen.com/terrain/13/2290/2985.png
## Source : http://tile.stamen.com/terrain/13/2291/2985.png
## Source : http://tile.stamen.com/terrain/13/2288/2986.png
## Source : http://tile.stamen.com/terrain/13/2289/2986.png
## Source : http://tile.stamen.com/terrain/13/2290/2986.png
## Source : http://tile.stamen.com/terrain/13/2291/2986.png
## Source : http://tile.stamen.com/terrain/13/2288/2987.png
## Source : http://tile.stamen.com/terrain/13/2289/2987.png
## Source : http://tile.stamen.com/terrain/13/2290/2987.png
## Source : http://tile.stamen.com/terrain/13/2291/2987.png
## Source : http://tile.stamen.com/terrain/13/2288/2988.png
## Source : http://tile.stamen.com/terrain/13/2289/2988.png
## Source : http://tile.stamen.com/terrain/13/2290/2988.png
## Source : http://tile.stamen.com/terrain/13/2291/2988.png
#categorias
rec_user %>%
select(name, categories, V1) %>%
unnest_tokens(category, categories) %>%
filter(category %in% c('restaurants','bars','nightlife')) %>%
group_by(category) %>%
mutate(pred_avg = mean(V1)) %>%
ungroup() %>%
unique() %>%
top_n(n = 5, wt = V1) %>%
mutate(name = reorder_within(name, -V1, category)) %>%
ggplot(aes(V1, name, fill = category)) +
geom_col(show.legend = TRUE) +
facet_wrap(~category, scales = 'free') +
scale_x_continuous() +
scale_y_reordered() +
labs(x = 'Probabilidade de boa avaliação')